In [1]:
score_data = read.csv('../input/scores.csv',stringsAsFactors=FALSE)
score_data[with(score_data, order(leaderboard_score)), ]
Out[1]:
In [2]:
lm.fit = lm(leaderboard_score ~ accuracy + logloss + AUC + f1 + mu + std,
data = score_data,
na.action = na.omit)
slm.fit = step(lm.fit, direction = "both")
summary(slm.fit)
Out[2]:
In [3]:
predictions = c()
models = c()
scores = c()
for (i in 1:nrow(score_data)) {
if (is.na(score_data[i,'std'])) {next}
if (score_data[i,'model']=='RandomForestClassifier ') {next} # a far outlier
if (score_data[i,'model']=='KNeighborsClassifier ') {next} # a far outlier
# print(paste0("|",score_data[i,'model'],"|"))
models = c(models, score_data[i,'model'])
scores = c(scores, score_data[i,'leaderboard_score'])
accuracy = score_data[i,'accuracy']
logloss = score_data[i,'logloss']
AUC = score_data[i,'AUC']
f1 = score_data[i,'f1']
mu = score_data[i,'mu']
std = score_data[i,'std']
predictions = c(predictions, round(predict(object=slm.fit,
newdata = data.frame(accuracy,logloss,AUC,f1,mu,std)),4))
}
pred_v_act = data.frame(models,scores,predictions)
pred_v_act
Out[3]:
In [4]:
#par(pin=c(6,6))
library(car)
plot(pred_v_act[,'predictions'], pred_v_act[,'scores'], main="Predicted Score v Leaderboard",
ylab="Leaderboard (worse ->)", xlab="Predicted Score", pch=19)#, xlim=c(0.25,1.8),ylim=c(0.25,1.8))
text(pred_v_act[,'predictions'], pred_v_act[,'scores'], labels=models, cex= 0.6)
abline(coef=c(0,1))
In [5]:
score_data = score_data[with(score_data, order(leaderboard_score)), ]
library(knitr)
foo = kable(score_data, format = "markdown", digits = 4)
foof = ''
for (i in 1:length(foo)) {
subs = substr(foo[i],5,52)
foof = cat(foof,cat(subs,'\n'))
}
foof
Out[5]:
In [ ]: